#clear environment 
rm(list = ls())

library(readr)
library(readxl)
library(reshape2)
library(dplyr)
library(ggplot2)
library(tidyr)
library(abind)
library(zoo)
library(stringr)
library(tidylog)
library(stargazer)

# set directories
source("./Code/set_directories.R")

ACSdata <- read_csv(file.path(DATA.PROCESSED,"ACS2018_blockgroup.csv"), 
                    col_types = cols(.default = "d", GEOID = "c", GeoLabel = "c")) %>% 
  select(-X1)

#Merge ACS Data with Census Region Names
census.regions <- read_csv(file.path(DATA.PATH,"regiondivision.csv"))
names(census.regions)[1] <- "stateFIPS"
ACSdata$stateFIPS <- substr(ACSdata$GEOID,start = 1,stop = 2)
ACSdata <- left_join(ACSdata,census.regions,by = "stateFIPS")

# Zips and Tracts
tracts <- read.csv(file.path(DATA.PATH,"zcta_tract_rel_10.txt"), sep=",", header = TRUE, colClasses=c("ZCTA5"="character","STATE"="character","COUNTY"="character","TRACT"="character"))
tracts$CountyFIPS <- paste(tracts$STATE,tracts$COUNTY,sep="")

# County FIPS
countyFIPS <- read.csv(file.path(DATA.PATH,"countyFIPS.txt"))
countyFIPS <- array(unlist(countyFIPS),dim=c(3,3233))
countyFIPS <- as.data.frame(t(as.matrix(countyFIPS)))
countyFIPS <- countyFIPS[-1,]
names(countyFIPS)[1] <- "CountyFIPS"
names(countyFIPS)[2] <- "CountyName"
names(countyFIPS)[3] <- "PostalCode"

#countyFIPS <- countyFIPS[,-3]
countyFIPS$CountyFIPS <- as.character(countyFIPS$CountyFIPS)
#change fips code for Miami-Dade (https://archive.epa.gov/ttn/ozone/web/html/miami.html)
countyFIPS <- countyFIPS %>% 
  mutate(CountyFIPS = if_else(CountyFIPS=="12025","12086",CountyFIPS))

# Merge CountyFIPS and Tracts
tracts <- left_join(x=tracts,y=countyFIPS,by="CountyFIPS")
tracts <- tracts %>% arrange(ZCTA5,desc(POPPT)) #sort by zip and then population (descending), so tract with greatest population within each zip is listed first

zips.fips.tracts <- subset(tracts, select = c("ZCTA5","GEOID","CountyFIPS","CountyName","PostalCode"))

#Collapse so that zip is only matched with one FIPS
collapsed.zips.fips.tracts <-  zips.fips.tracts[ !duplicated(zips.fips.tracts$ZCTA5), ]
names(collapsed.zips.fips.tracts)[1] <- "region"

# Read in system size count equivalent data
f_systems_data_clean <- file.path(DATA.PROCESSED,"systems_data_clean.rds")
systems_data_clean <- read_rds(f_systems_data_clean)

# Load environmental benefits/damages data
env.benefits.created <- readRDS(file.path(DATA.PROCESSED,"total_damages_zips.rds")) %>% 
  left_join(systems_data_clean %>% 
              select(zip, count_equiv), by=c("region"="zip")) %>% 
  rename(num_systems = count_equiv) %>% 
  replace_na(list(num_systems = 0)) %>% 
  left_join(read_csv(file.path(DATA.SOLAR.SITING,"Generation_and_damages_withCO2.csv")) %>% 
              mutate(zip = sprintf("%05d", zip)) %>% 
              select(zip, subsidy_value), by=c("region"="zip")) %>% 
  replace_na(list(subsidy_value = 0))

env.benefits.received <- readRDS(file.path(DATA.PROCESSED,"total_damages_fips.rds")) %>% 
  mutate(region = if_else(region=="12025","12086",region))

env.benefits.created <- left_join(env.benefits.created,collapsed.zips.fips.tracts,by="region")
####NEED TO DEAL WITH THE zips that weren't matched to counties**************
env.benefits.created.na <- subset(env.benefits.created,is.na(GEOID)==1) %>% 
  select(region,value,num_systems,subsidy_value)
env.benefits.created <- subset(env.benefits.created,is.na(GEOID)==0)

#now let's match the zips that weren't matched to counties to the closest zips in the collapsed.zips.fips.tracts file
env.benefits.created.na$region.num <- as.numeric(env.benefits.created.na$region)
collapsed.zips.fips.tracts$region.num <- as.numeric(collapsed.zips.fips.tracts$region)
library(purrr)
env.benefits.created.na.test = env.benefits.created.na %>% 
  dplyr::mutate(closestZip = purrr::map_dbl(.x=region.num,~collapsed.zips.fips.tracts$region.num[which.min(abs(.x-collapsed.zips.fips.tracts$region.num))])) %>% 
  dplyr::left_join(collapsed.zips.fips.tracts,by=c('closestZip'='region.num'))
env.benefits.created.na.test$zipdiff <- abs(env.benefits.created.na.test$region.num - env.benefits.created.na.test$closestZip)
env.benefits.created.na.test$region <- if_else(env.benefits.created.na.test$zipdiff<100,env.benefits.created.na.test$region.y,env.benefits.created.na.test$region.x)
env.benefits.created.na.test <- env.benefits.created.na.test %>% 
  select(region,value,num_systems,subsidy_value) %>% 
  left_join(collapsed.zips.fips.tracts,by="region") %>% 
  select(-region.num)
#append these to the env.benefits.created file
env.benefits.created <- rbind(env.benefits.created,env.benefits.created.na.test)

#sum benefits to the county level
env.benefits.created.county <- env.benefits.created %>% 
  group_by(CountyFIPS) %>% 
  summarize(value = sum(value,na.rm=TRUE),
            num_systems = sum(num_systems,na.rm=TRUE),
            subsidy_value = mean(subsidy_value,na.rm=TRUE)) %>% 
  rename(region = CountyFIPS)

#Plot map of benefits created by county
env.benefits.created.county$region <- as.numeric(env.benefits.created.county$region)
#library(choroplethr)
#library(choroplethrMaps)

#Plot map of benefits received by county
env.benefits.received$region <- as.numeric(env.benefits.received$region)

#Plot maps of benefits created and received using ggplot
library(maps)
MainStates <- map_data("state")
data("county.fips")
#Need to clean these county.fips...
#e.g., Currituck, NC, Miami-Dade, FL, etc. aren't merging right
county.fips <- county.fips %>% 
  #filter(fips!=30067 | polyname!="montana,park") %>% 
  #filter(!duplicated(fips)) %>% 
  mutate(polyname = sub("\\:.*", "", polyname))


AllCounty <- map_data("county")
AllCounty <- AllCounty %>% 
  mutate(polyname = paste(region,subregion,sep=",")) %>% 
  #drop Broomfield, CO, which is a new (as of 2001) construction from parts of three other counties
  #(https://www.ddorn.net/data/FIPS_County_Code_Changes.pdf)
  filter(polyname!="colorado,broomfield") %>% 
  left_join(county.fips,by="polyname")

tagsC <- c("0","(0 to 1,500)", "[1,500 to 3,000)", "[3,000 to 5,000)", "[5,000 to 10,000)", "[10,000 to 20,000)","[20,000 to 40,000)","[40,000 to 100,000)", "\u2265 100,000")
CREATED <- AllCounty %>% left_join(env.benefits.created.county,by=c("fips"="region")) %>% 
  mutate(tag = case_when(
    value <= 0 ~ tagsC[1],
    value > 0 & value <1500 ~ tagsC[2],
    value >= 1500 & value <3000 ~ tagsC[3],
    value >= 3000 & value <5000 ~ tagsC[4],
    value >= 5000 & value <10000 ~ tagsC[5],
    value >= 10000 & value <20000 ~ tagsC[6],
    value >= 20000 & value <40000 ~ tagsC[7],
    value >= 40000 & value <100000 ~ tagsC[8],
    value>=100000 ~ tagsC[9]
  ))
CREATED$tagf <- factor(CREATED$tag,levels=tagsC,ordered=FALSE)

#remove King County, TX (which our benefits.created data is missing because it contains only a PO Box zip code)
CREATED <- CREATED %>% 
  filter(fips!=48269)

tagsR <- c("\u2264 0","(0 to 1,500)", "[1,500 to 3,000)", "[3,000 to 5,000)", "[5,000 to 10,000)", "[10,000 to 20,000)","[20,000 to 40,000)","[40,000 to 100,000)", "\u2265 100,000")
RECEIVED <- AllCounty %>% left_join(env.benefits.received,by=c("fips"="region")) %>% 
  mutate(tag = case_when(
    value <= 0 ~ tagsR[1],
    value > 0 & value <1500 ~ tagsR[2],
    value >= 1500 & value <3000 ~ tagsR[3],
    value >= 3000 & value <5000 ~ tagsR[4],
    value >= 5000 & value <10000 ~ tagsR[5],
    value >= 10000 & value <20000 ~ tagsR[6],
    value >= 20000 & value <40000 ~ tagsR[7],
    value >= 40000 & value <100000 ~ tagsR[8],
    value>=100000 ~ tagsR[9]
  ))
RECEIVED$tagf <- factor(RECEIVED$tag,levels=tagsR,ordered=FALSE)

library(ggthemes)
CREATED_map <- ggplot() + geom_polygon( data=CREATED, aes(x=long, y=lat, group=group, fill=tagf),
                                        color="black", size = .1 ) + scale_fill_brewer()+ #scale_fill_viridis_d() +
  geom_polygon( data=MainStates, aes(x=long, y=lat, group=group),
                color="black", fill="white",   size = .5, alpha = 0) + 
  theme_map() + theme(legend.title = element_blank())

RECEIVED_map <- ggplot() + geom_polygon( data=RECEIVED, aes(x=long, y=lat, group=group, fill=tagf),
                                         color="black", size = .1 ) + scale_fill_brewer()+ #scale_fill_viridis_d() +
  geom_polygon( data=MainStates, aes(x=long, y=lat, group=group),
                color="black", fill="white",   size = .5, alpha = 0) + 
  theme_map() + theme(legend.title = element_blank())

cairo_pdf(file.path(FIGURES.OUT, 'Total_benefits_created_by_county.pdf'), width=10, height=6)
CREATED_map
dev.off()

cairo_pdf(file.path(FIGURES.OUT, 'Total_benefits_received_by_county.pdf'), width=10, height=6)
RECEIVED_map
dev.off()

# Get population counts by county
ACSdata$CountyFIPS <- substr(ACSdata$GEOID,start = 1,stop = 5)
pop.county <- aggregate(Number.POP ~ CountyFIPS, data = ACSdata, FUN = sum)

# Merge these population counts with env benefits received
env.benefits.received <- readRDS(file.path(DATA.PROCESSED,"total_damages_fips.rds"))
env.benefits.received.per.cap <- left_join(pop.county,env.benefits.received,by=c("CountyFIPS" = "region"))
env.benefits.received.per.cap$env.ben.PC <- env.benefits.received.per.cap$value/env.benefits.received.per.cap$Number.POP

# Merge population counts with env benefits created
env.benefits.created.county$county <- sprintf("%05d", env.benefits.created.county$region)
env.benefits.created.per.cap <- left_join(pop.county,env.benefits.created.county,by=c("CountyFIPS" = "county"))
env.benefits.created.per.cap$env.ben.created.PC <- env.benefits.created.per.cap$value/env.benefits.created.per.cap$Number.POP
env.benefits.created.per.cap$num_systems.PC <- env.benefits.created.per.cap$num_systems/env.benefits.created.per.cap$Number.POP
env.benefits.created.per.cap <- subset(env.benefits.created.per.cap, select = c(CountyFIPS,env.ben.created.PC,num_systems.PC,subsidy_value))
env.benefits <- left_join(env.benefits.received.per.cap,env.benefits.created.per.cap,by="CountyFIPS")

# Merge per capita benefits with ACS data
ACSdata.env <- left_join(ACSdata,env.benefits,by="CountyFIPS")
ACSdata.env <- subset(ACSdata.env, select = -c(Number.POP.y,value)) #drop the county population column and drop the county level benefits (value)
ACSdata.env <- subset(ACSdata.env,stateFIPS!="02") #drop AK
ACSdata.env <- subset(ACSdata.env,stateFIPS!="15") #drop HI
ACSdata.env <- subset(ACSdata.env,stateFIPS!="60"&stateFIPS!="66"&stateFIPS!="69"&stateFIPS!="72"&stateFIPS!="78") #drop Guam, NMI, PR, VI

sum(is.na(ACSdata.env$env.ben.PC)) #counts number of NAs in env.ben.PC column
#********Let's drop these for now. Need to fix this later.*************#
whatsmissing <- subset(ACSdata.env,is.na(env.ben.PC)==1)
ACSdata.env <- subset(ACSdata.env,is.na(env.ben.PC)==0)

#Drop if missing Median Household Income (MedianHH)
ACSdata.env$Median.HH <- as.numeric(ACSdata.env$Median.HH)
ACSdata.env <- subset(ACSdata.env,is.na(Median.HH)==0)

#Calculate total benefits in each Census block group
ACSdata.env$env.ben <- ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.x
ACSdata.env$num.systems <- ACSdata.env$num_systems.PC*ACSdata.env$Number.POP.x
total.income <- sum(ACSdata.env$Median.HH*ACSdata.env$Number.HH)

#Plot Lorenz curves (Figure 3)
#order ACSdata by MedianHH income (i.e., rank individuals by their income)
ACSdata.env.inc.sort <- ACSdata.env[order(ACSdata.env$Median.HH),]
ACSdata.env.inc.sort$cumPop <- cumsum(ACSdata.env.inc.sort$Number.POP.x)/max(cumsum(ACSdata.env.inc.sort$Number.POP.x))
#ACSdata.env.inc.sort$cumInc <- cumsum(ACSdata.env.inc.sort$Median.HH)/max(cumsum(ACSdata.env.inc.sort$Median.HH))
ACSdata.env.inc.sort$cumInc <- cumsum(ACSdata.env.inc.sort$Median.HH*ACSdata.env.inc.sort$Number.HH)/max(cumsum(ACSdata.env.inc.sort$Median.HH*ACSdata.env.inc.sort$Number.HH))

#order ACSdata by env.ben.PC (i.e., rank census tracts by per capita environmental benefits)
ACSdata.env.ben.sort <- ACSdata.env[order(ACSdata.env$env.ben.PC),]
ACSdata.env.ben.sort$cumPop <- cumsum(ACSdata.env.ben.sort$Number.POP.x)/max(cumsum(ACSdata.env.ben.sort$Number.POP.x))
ACSdata.env.ben.sort$cumBen <- cumsum(ACSdata.env.ben.sort$env.ben)/max(cumsum(ACSdata.env.ben.sort$env.ben))

pdf(file.path(FIGURES.OUT, 'Lorenz_curves.pdf'), width=8, height=4)
ggplot() + 
  geom_line(data=ACSdata.env.inc.sort, aes(x=cumPop, y=cumInc, color="Median Income")) + 
  geom_line(data=ACSdata.env.ben.sort, aes(x=cumPop, y=cumBen, color="Environmental Benefits")) +
  geom_segment(aes(x=0,y=0,xend=1,yend=1)) +
  scale_color_manual("",
                     breaks = c("Median Income","Environmental Benefits"),
                     values = c("blue","cyan")) +
  xlab("Cumulative fraction of population") + ylab("Cumulative fraction") +
  theme_minimal() + 
  theme(legend.position = c(0.2,0.8), legend.title = element_blank(),legend.background = element_blank(),legend.box.background = element_rect(color = "black"))
dev.off()

# To plot Figure 4, export data to Stata to use lpoly function
library(foreign)
write.dta(ACSdata.env,file.path(DATA.PROCESSED,"ACSdata_env.dta"))

# Create Table 3 (Env Ben per Cap by Race and Income)
ACSdata.env$decile <- with(ACSdata.env, cut(Median.HH, 
                                breaks=quantile(Median.HH, probs=seq(0,1, by=0.1), na.rm=TRUE), 
                                include.lowest=TRUE))
ACSdata.env$decile <- as.numeric(ACSdata.env$decile)
test <- subset(ACSdata.env,decile==2)
test.out <- sum(test$env.ben)/sum(test$Number.POP.x)

table3.stats <- ACSdata.env %>% 
  group_by(decile) %>% 
  summarize(wt.avg = sum(env.ben)/sum(Number.POP.x),
            wt.avg.black = sum(env.ben.PC*Number.POP.black)/sum(Number.POP.black),
            wt.avg.latino = sum(env.ben.PC*Number.POP.latino)/sum(Number.POP.latino),
            wt.avg.asian = sum(env.ben.PC*Number.POP.asian)/sum(Number.POP.asian),
            wt.avg.whitealone = sum(env.ben.PC*Number.POP.whitealone)/sum(Number.POP.whitealone))
table3.stats <- table3.stats[,c(1,3:6,2)]
table3.totalrow.black <- round(sum(ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.black)/sum(ACSdata.env$Number.POP.black),digits=3)
table3.totalrow.latino <- round(sum(ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.latino)/sum(ACSdata.env$Number.POP.latino),digits=3)
table3.totalrow.asian <- round(sum(ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.asian)/sum(ACSdata.env$Number.POP.asian),digits=3)
table3.totalrow.whitealone <- round(sum(ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.whitealone)/sum(ACSdata.env$Number.POP.whitealone),digits=3)
table3.totalrow.total <- round(sum(ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.x)/sum(ACSdata.env$Number.POP.x),digits=3)

table3.stats <- rbind(table3.stats,c("Total",table3.totalrow.black,table3.totalrow.latino,table3.totalrow.asian,table3.totalrow.whitealone,table3.totalrow.total))
table3.stats$wt.avg.black <- round(as.numeric(table3.stats$wt.avg.black),digits=3)
table3.stats$wt.avg.latino <- round(as.numeric(table3.stats$wt.avg.latino),digits=3)
table3.stats$wt.avg.asian <- round(as.numeric(table3.stats$wt.avg.asian),digits=3)
table3.stats$wt.avg.whitealone <- round(as.numeric(table3.stats$wt.avg.whitealone),digits=3)
table3.stats$wt.avg <- round(as.numeric(table3.stats$wt.avg),digits=3)
colnames(table3.stats) <- c("Income Decile","Black","Hispanic/Latinx","Asian","White","All")
#Print income decile and race table to file
byLine <- do.call("c", strsplit(capture.output(stargazer(table3.stats, summary=FALSE, rownames=FALSE, float=F)),"\n"))
result <- append(byLine,c(" & \\multicolumn{5}{c}{Demographic Group} \\\\ \\cmidrule{2-6}"), after = c(4))
cat(paste(result, collapse = "\n"),file = file.path(TABLES.OUT,"tab_deciles_races.tex"))


# Do univariate regressions (in Stata using data created here) to create Table A.3
ACSdata.env$Median.HH.10k <- ACSdata.env$Median.HH/10000
ACSdata.env$share.pop.black <- ACSdata.env$Number.POP.black/ACSdata.env$Number.POP.x
ACSdata.env$share.pop.latino <- ACSdata.env$Number.POP.latino/ACSdata.env$Number.POP.x
ACSdata.env$share.pop.asian <- ACSdata.env$Number.POP.asian/ACSdata.env$Number.POP.x
ACSdata.env$share.pop.whitealone <- ACSdata.env$Number.POP.whitealone/ACSdata.env$Number.POP.x
#ACSdata.env$share.urban <- (ACSdata.env$Total.Percent.UA.Pop+ACSdata.env$Total.Percent.UC.Pop)/100

ACSdata.env$weights <- ACSdata.env$Number.POP.x/sum(ACSdata.env$Number.POP.x)
write.dta(ACSdata.env,file.path(DATA.PROCESSED,"ACSdata_env.dta"))


# Create tables of benefits created/received by region and division
env.benefits.received$stateFIPS <- substr(env.benefits.received$region,start = 1,stop = 2)
env.benefits.received <- left_join(env.benefits.received,census.regions,by = "stateFIPS")

env.benefits.created.county$county <- sprintf("%05d", env.benefits.created.county$region)
env.benefits.created.county$stateFIPS <- substr(env.benefits.created.county$county,start = 1,stop = 2)
env.benefits.created.county <- left_join(env.benefits.created.county,census.regions,by = "stateFIPS")

ben.received.region <- env.benefits.received %>% 
  group_by(RegionName) %>% 
  summarize(ben.received = sum(value))

ben.created.region <- env.benefits.created.county %>% 
  group_by(RegionName) %>% 
  summarize(ben.created = sum(value))

region.benefits <- left_join(ben.created.region,ben.received.region,by="RegionName")
colnames(region.benefits) <- c("Region","Created (millions $)","Received (millions $)")
region.benefits$`Created (millions $)` <- round(region.benefits$`Created (millions $)`/1000000,1)
region.benefits$`Received (millions $)` <- round(region.benefits$`Received (millions $)`/1000000,1)
#Write benefits by region to Latex file
byLine <- do.call("c", strsplit(capture.output(stargazer(region.benefits, summary=FALSE, rownames=FALSE, float=F)),"\n"))
result <- sub(" ccc", " lcc", byLine)
cat(paste(result, collapse = "\n"),file = file.path(TABLES.OUT,"region_benefits.tex"))

#Calculate benefits created and received by Census Division
ben.received.division <- env.benefits.received %>% 
  group_by(DivisionName) %>% 
  summarize(ben.received = sum(value))

ben.created.division <- env.benefits.created.county %>% 
  group_by(DivisionName) %>% 
  summarize(ben.created = sum(value))

division.benefits <- left_join(ben.created.division,ben.received.division,by="DivisionName")
colnames(division.benefits) <- c("Division","Created (millions $)","Received (millions $)")
division.benefits$`Created (millions $)` <- round(division.benefits$`Created (millions $)`/1000000,1)
division.benefits$`Received (millions $)` <- round(division.benefits$`Received (millions $)`/1000000,1)
#Write benefits by division to Latex file
byLine <- do.call("c", strsplit(capture.output(stargazer(division.benefits, summary=FALSE, rownames=FALSE, float=F)),"\n"))
result <- sub(" ccc", " lcc", byLine)
cat(paste(result, collapse = "\n"),file = file.path(TABLES.OUT,"division_benefits.tex"))

#Calculate benefits that accrue to different demographic groups
ACSdata.env$env.ben.black <- ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.black
ACSdata.env$env.ben.latino <- ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.latino
ACSdata.env$env.ben.asian <- ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.asian
ACSdata.env$env.ben.whitealone <- ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.whitealone

#Calculate benefits that are created by different demographic groups
ACSdata.env$env.ben.created.black <- ACSdata.env$env.ben.created.PC*ACSdata.env$Number.POP.black
ACSdata.env$env.ben.created.latino <- ACSdata.env$env.ben.created.PC*ACSdata.env$Number.POP.latino
ACSdata.env$env.ben.created.asian <- ACSdata.env$env.ben.created.PC*ACSdata.env$Number.POP.asian
ACSdata.env$env.ben.created.whitealone <- ACSdata.env$env.ben.created.PC*ACSdata.env$Number.POP.whitealone

total.benefit.shares <- as.data.frame((c("Black","Hispanic/Latinx","Asian","White")))
total.benefits <- sum(env.benefits.received$value)
total.benefit.shares$share.created <- c(sum(ACSdata.env$env.ben.created.black,na.rm = T)/total.benefits,sum(ACSdata.env$env.ben.created.latino,na.rm = T)/total.benefits,sum(ACSdata.env$env.ben.created.asian,na.rm = T)/total.benefits,sum(ACSdata.env$env.ben.created.whitealone,na.rm = T)/total.benefits)
total.benefit.shares$share.received <- c(sum(ACSdata.env$env.ben.black)/total.benefits,sum(ACSdata.env$env.ben.latino)/total.benefits,sum(ACSdata.env$env.ben.asian)/total.benefits,sum(ACSdata.env$env.ben.whitealone)/total.benefits)
total.benefit.shares$pc.created  <- c(sum(ACSdata.env$env.ben.created.black,na.rm = T)/sum(ACSdata.env$Number.POP.black),sum(ACSdata.env$env.ben.created.latino,na.rm = T)/sum(ACSdata.env$Number.POP.latino),sum(ACSdata.env$env.ben.created.asian,na.rm = T)/sum(ACSdata.env$Number.POP.asian),sum(ACSdata.env$env.ben.created.whitealone,na.rm = T)/sum(ACSdata.env$Number.POP.whitealone))
total.benefit.shares$pc.received <- c(sum(ACSdata.env$env.ben.black)/sum(ACSdata.env$Number.POP.black),sum(ACSdata.env$env.ben.latino)/sum(ACSdata.env$Number.POP.latino),sum(ACSdata.env$env.ben.asian)/sum(ACSdata.env$Number.POP.asian),sum(ACSdata.env$env.ben.whitealone)/sum(ACSdata.env$Number.POP.whitealone))

colnames(total.benefit.shares) <- c("Race","Share Created","Share Received","Per capita created","Per capita received")
byLine <- do.call("c", strsplit(capture.output(stargazer(total.benefit.shares[,1:3], summary=FALSE, rownames=FALSE, float=F)),"\n"))
result <- sub(" ccc", " lcc", byLine)
cat(paste(result, collapse = "\n"),file = file.path(TABLES.OUT,"total_benefit_shares.tex"))

#total benefits by race
total.benefits.by.race <- as.data.frame((c("Black","Hispanic/Latinx","Asian","White")))
total.benefits.by.race$Current <- c(sum(ACSdata.env$env.ben.black),sum(ACSdata.env$env.ben.latino),sum(ACSdata.env$env.ben.asian),sum(ACSdata.env$env.ben.whitealone))

# calculate benefits created and received by different income deciles
# group by income decile and then summarize using sum

total.benefits.by.income.decile <- ACSdata.env %>%
  mutate(env.ben.created = env.ben.created.PC*Number.POP.x) %>%
  group_by(decile) %>%
  summarize(env.ben.created = sum(env.ben.created,na.rm=TRUE),env.ben.received = sum(env.ben)) %>%
  mutate(share.created = round(env.ben.created/total.benefits,3), share.received = round(env.ben.received/total.benefits,3))

total.benefit.shares.by.income.decile <- total.benefits.by.income.decile %>%
  dplyr::select(decile,share.created,share.received) %>%
  rename("Income Decile" = decile, "Share Created" = share.created, "Share Received" = share.received)
byLine <- do.call("c", strsplit(capture.output(stargazer(total.benefit.shares.by.income.decile, summary=FALSE, rownames=FALSE, float=F)),"\n"))
result <- sub(" ccc", " lcc", byLine)
cat(paste(result, collapse = "\n"),file = file.path(TABLES.OUT,"total_benefit_shares_by_decile.tex"))

#Instead, could plot benefits created and received as a "generalized Lorenz curve" where the x-axis is ranked by income
total.benefits.by.income <- ACSdata.env %>%
  mutate(env.ben.created = env.ben.created.PC*Number.POP.x) %>%
  mutate(share.created = env.ben.created/total.benefits, share.received = env.ben/total.benefits) %>%
  arrange(Median.HH) %>%
  mutate(cum.share.created = cumsum(replace_na(share.created,0)), cum.share.received = cumsum(share.received),
         cum.pop.share = cumsum(Number.POP.x)/max(cumsum(Number.POP.x)))
pdf(file.path(FIGURES.OUT, 'Lorenz_curves_shares_created_received.pdf'), width=8, height=4)
ggplot() + 
  geom_line(data=total.benefits.by.income, aes(x=cum.pop.share, y=cum.share.created, color="Share Created")) + 
  geom_line(data=total.benefits.by.income, aes(x=cum.pop.share, y=cum.share.received, color="Share Received")) +
  geom_segment(aes(x=0,y=0,xend=1,yend=1)) +
  scale_color_manual("",
                     breaks = c("Share Created","Share Received"),
                     values = c("blue","cyan")) +
  xlab("Cumulative fraction of population") + ylab("Cumulative shares") +
  theme_minimal() + 
  theme(legend.position = c(0.2,0.8), legend.title = element_blank(),legend.background = element_blank(),legend.box.background = element_rect(color = "black"))
dev.off()

#Read in incentive composition calculations
incentive_composition <- read_xlsx(file.path(DATA.SOLAR.SITING,"Solar_Incentives_Data.xlsx"),
                                   sheet="Sheet1",n_max=52) %>% 
  select(State,`fraction of subsidy that is federal`,`fraction of subsidy that is NEM`,`fraction of subsidy that is state, non-NEM`) %>% 
  rename(frac_fed = `fraction of subsidy that is federal`,
         frac_NEM = `fraction of subsidy that is NEM`,
         frac_state = `fraction of subsidy that is state, non-NEM`) %>% 
  left_join(select(read_csv(file.path(DATA.PATH,"stateFIPS.txt")),PostalCode,FIPS),by=c("State"="PostalCode")) %>% 
  filter(!State%in%c("AK","HI"))

# Calculate Incentives Received by Income Decile
incentives.by.income.decile <- ACSdata.env %>%
  left_join(incentive_composition,by=c("stateFIPS"="FIPS")) %>% select(-State) %>% 
  mutate(incentive.received = num_systems.PC*Number.POP.x*subsidy_value,
         incentive.received.fed = incentive.received*frac_fed,
         incentive.received.nem = incentive.received*frac_NEM,
         incentive.received.state = incentive.received*frac_state) %>% 
  group_by(decile) %>%
  summarize(incentive.received = sum(incentive.received,na.rm=TRUE),
            incentive.received.fed = sum(incentive.received.fed,na.rm=TRUE),
            incentive.received.nem = sum(incentive.received.nem,na.rm=TRUE),
            incentive.received.state = sum(incentive.received.state,na.rm=TRUE)) %>%
  mutate(share.incentives = round(incentive.received/sum(incentive.received),3),
         share.incentives.fed = round(incentive.received.fed/sum(incentive.received.fed),3),
         share.incentives.nem = round(incentive.received.nem/sum(incentive.received.nem),3),
         share.incentives.state = round(incentive.received.state/sum(incentive.received.state),3))
incentive.shares.by.income.decile <- incentives.by.income.decile %>%
  select(decile,share.incentives,share.incentives.fed,share.incentives.nem,share.incentives.state) %>%
  rename("Income Decile" = decile, "Total" = share.incentives, "Federal" = share.incentives.fed,
         "NEM" = share.incentives.nem, "State" = share.incentives.state)
byLine <- do.call("c", strsplit(capture.output(stargazer(incentive.shares.by.income.decile, summary=FALSE, rownames=FALSE, float=F)),"\n"))
result <- sub(" ccccc", " lcccc", byLine)
cat(paste(result, collapse = "\n"),file = file.path(TABLES.OUT,"incentive_shares_by_decile_wsize.tex"))

sum(incentives.by.income.decile$incentive.received)
sum(incentives.by.income.decile$incentive.received.fed)
sum(incentives.by.income.decile$incentive.received.nem)
sum(incentives.by.income.decile$incentive.received.state)
